#library
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
import seaborn as sn
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
import plotly.graph_objects as go
#dataset
data_load = pd.read_csv('GlaucomaM.csv')
data_load.head()
| ag | at | as | an | ai | eag | eat | eas | ean | eai | ... | tmt | tms | tmn | tmi | mr | rnf | mdic | emd | mv | Class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2.220 | 0.354 | 0.580 | 0.686 | 0.601 | 1.267 | 0.336 | 0.346 | 0.255 | 0.331 | ... | -0.018 | -0.230 | -0.510 | -0.158 | 0.841 | 0.410 | 0.137 | 0.239 | 0.035 | normal |
| 1 | 2.681 | 0.475 | 0.672 | 0.868 | 0.667 | 2.053 | 0.440 | 0.520 | 0.639 | 0.454 | ... | -0.014 | -0.165 | -0.317 | -0.192 | 0.924 | 0.256 | 0.252 | 0.329 | 0.022 | normal |
| 2 | 1.979 | 0.343 | 0.508 | 0.624 | 0.504 | 1.200 | 0.299 | 0.396 | 0.259 | 0.246 | ... | -0.097 | -0.235 | -0.337 | -0.020 | 0.795 | 0.378 | 0.152 | 0.250 | 0.029 | normal |
| 3 | 1.747 | 0.269 | 0.476 | 0.525 | 0.476 | 0.612 | 0.147 | 0.017 | 0.044 | 0.405 | ... | -0.035 | -0.449 | -0.217 | -0.091 | 0.746 | 0.200 | 0.027 | 0.078 | 0.023 | normal |
| 4 | 2.990 | 0.599 | 0.686 | 1.039 | 0.667 | 2.513 | 0.543 | 0.607 | 0.871 | 0.492 | ... | -0.105 | 0.084 | -0.012 | -0.054 | 0.977 | 0.193 | 0.297 | 0.354 | 0.034 | normal |
5 rows × 63 columns
data_load.isnull().sum()
ag 0
at 0
as 0
an 0
ai 0
..
rnf 0
mdic 0
emd 0
mv 0
Class 0
Length: 63, dtype: int64
le = LabelEncoder()
data_load.Class = le.fit_transform(data_load.Class)
data_load['Class']
0 1
1 1
2 1
3 1
4 1
..
191 0
192 0
193 0
194 0
195 0
Name: Class, Length: 196, dtype: int64
model_params = {
'svm': {
'model': svm.SVC(gamma='auto'),
'params' : {
'C': [1,10,20],
'kernel': ['rbf','linear']
}
},
'random_forest': {
'model': RandomForestClassifier(),
'params' : {
'n_estimators': [1,5,10]
}
},
'logistic_regression' : {
'model': LogisticRegression(solver='liblinear',multi_class='auto'),
'params': {
'C': [1,5,10]
}
}
}
pd.DataFrame(model_params)
| svm | random_forest | logistic_regression | |
|---|---|---|---|
| model | SVC(gamma='auto') | RandomForestClassifier() | LogisticRegression(solver='liblinear') |
| params | {'C': [1, 10, 20], 'kernel': ['rbf', 'linear']} | {'n_estimators': [1, 5, 10]} | {'C': [1, 5, 10]} |
scores = []
for model_name, mp in model_params.items():
clf = GridSearchCV(mp['model'], mp['params'], cv=3, return_train_score=False)
clf.fit(data_load.drop('Class',axis='columns'), data_load.Class)
scores.append({
'model': model_name,
'best_score': clf.best_score_,
'best_params': clf.best_params_
})
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df
| model | best_score | best_params | |
|---|---|---|---|
| 0 | svm | 0.882517 | {'C': 1, 'kernel': 'linear'} |
| 1 | random_forest | 0.836519 | {'n_estimators': 10} |
| 2 | logistic_regression | 0.851904 | {'C': 1} |
from sklearn.model_selection import train_test_split
X = data_load.drop('Class', axis='columns')
y = data_load.Class
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,random_state=0)
model = SVC(C=1.5,kernel='linear', random_state=42)
model.fit(X_train, y_train)
SVC(C=1.5, kernel='linear', random_state=42)
model.score(X_test, y_test)
0.95
classes1 = {
0:'Normal',
1:'Gulcoma',
}
y_predicted = model.predict(X_test)
y_predicted
array([1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0])
classes1[y_predicted[3]]
'Normal'
cm = confusion_matrix(y_test, y_predicted)
cm
array([[10, 1],
[ 0, 9]])
fig = go.Figure(data=go.Heatmap(
z=cm,
x=['Normal','Glucoma'],
y=['Normal','Glucoma'],
hoverongaps = False))
fig.show()
import matplotlib.pyplot as plt
plt.bar(df['model'], df['best_score'])
plt.ylabel('Best Score')
plt.xlabel('Algorithms')
plt.show()